*==============================================================================*
* Regression Analysis
*==============================================================================*
clear all 

ssc install coefplot, replace
ssc install scheme-burd, replace
ssc install winsor2, replace
set scheme burd

*==============================================================================*
cd "P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities"
global project "P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities"
global data "${project}\Final datasets\"
global output "${project}\Final charts\Revision"


frame create dbtr_level 
frame change dbtr_level

use "${data}\dbtr_level_b_dt.dta", clear

* create new variable for size and drop the original
tab sz_f
clonevar size = sz_f
drop sz_f 
* drop missing size, nace, country code
tab size
drop if size == .
tab nace_code 
drop if nace_code == ""
tab cntry_dbtr
drop if cntry_dbtr == ""
* drop countries
drop if inlist(cntry_dbtr, "CY", "EE", "LT", "LU", "LV", "MT", "SI", "SK")


* encode size, nace, country
* size
gen sz = "Micro"
replace sz = "Large" if size == 1
replace sz = "Medium" if size == 2
replace sz = "Small" if size == 3
* nace
gen nace = "Agriculture"
replace nace = "Mining" if nace_code == "B"
replace nace = "Manufacturing" if nace_code == "C"
replace nace = "Electricity" if nace_code == "D"
replace nace = "Water" if nace_code == "E"
replace nace = "Construction" if nace_code == "F"
replace nace = "Wholesale and retail" if nace_code == "G"
replace nace = "Transportation" if nace_code == "H"
replace nace = "Accommodation and food services" if nace_code == "I"
replace nace = "Information and communication" if nace_code == "J"
replace nace = "Financial and insurance" if nace_code == "K"
replace nace = "Real estate" if nace_code == "L"
replace nace = "Professional activities" if nace_code == "M"
replace nace = "Administrative activities" if nace_code == "N"
replace nace = "Public administration" if nace_code == "O"
replace nace = "Education" if nace_code == "P"
replace nace = "Human health activities" if nace_code == "Q"
replace nace = "Arts and entertainment" if nace_code == "R"
replace nace = "Other services activities" if nace_code == "S"
replace nace = "Households as employers" if nace_code == "T"
replace nace = "Activities of international org." if nace_code == "U"

* winsorize number of creditors
sum nmbr_crdtrs, d
gen nmbr_w = nmbr_crdtrs
replace nmbr_w = 13 if nmbr_crdtrs > 13
sum nmbr_w nmbr_crdtrs

* check new variables
tab sz
tab nace

* encode to use in regressions
encode sz, gen(sz_f)
encode nace, gen(sctr_f)
encode cntry_dbtr, gen(cntry_f)

* check bank variables
sum i_size
sum b_spec_diff_dbtr

* check data set
tab cntry_dbtr

* calculate mean value
sum nmbr_w mb_shr


*==============================================================================*
* Instrument level
*==============================================================================*

** change to instrument level
frame create instr_level 
frame change instr_level

use "${data}\instr_level_b_dt.dta", clear

* create new variable for size and drop the original
tab sz_f
clonevar size = sz_f
drop sz_f 
* drop missing size, nace, country code
tab size
drop if size == .
tab nace_code 
drop if nace_code == ""
tab cntry_dbtr
drop if cntry_dbtr == ""
* drop countries
drop if inlist(cntry_dbtr, "CY", "EE", "LT", "LU", "LV", "MT", "SI", "SK")


* encode size, nace, country
* size
gen sz = "Micro"
replace sz = "Large" if size == 1
replace sz = "Medium" if size == 2
replace sz = "Small" if size == 3
* nace
gen nace = "Agriculture"
replace nace = "Mining" if nace_code == "B"
replace nace = "Manufacturing" if nace_code == "C"
replace nace = "Electricity" if nace_code == "D"
replace nace = "Water" if nace_code == "E"
replace nace = "Construction" if nace_code == "F"
replace nace = "Wholesale and retail" if nace_code == "G"
replace nace = "Transportation" if nace_code == "H"
replace nace = "Accommodation and food services" if nace_code == "I"
replace nace = "Information and communication" if nace_code == "J"
replace nace = "Financial and insurance" if nace_code == "K"
replace nace = "Real estate" if nace_code == "L"
replace nace = "Professional activities" if nace_code == "M"
replace nace = "Administrative activities" if nace_code == "N"
replace nace = "Public administration" if nace_code == "O"
replace nace = "Education" if nace_code == "P"
replace nace = "Human health activities" if nace_code == "Q"
replace nace = "Arts and entertainment" if nace_code == "R"
replace nace = "Other services activities" if nace_code == "S"
replace nace = "Households as employers" if nace_code == "T"
replace nace = "Activities of international org." if nace_code == "U"

* check new variables
tab sz
tab nace

* encode to use in regressions
encode sz, gen(sz_f)
encode nace, gen(sctr_f)
encode cntry_dbtr, gen(cntry_f)

* drop missing wir
drop if wir_byloantype == .
sum wir_byloantype, d


*Loan type
drop if typ_instr_corr == "Revolving credit"
drop if typ_instr_corr == "Finance leases"
drop if typ_instr_corr == "Trade receivables"
tab typ_instr_corr
encode typ_instr_corr, gen(instr_f)

* calculate loan type share within debtor
egen dbtr_ona = total(ona_orgnlmtrty_c_ttl), by(dbtr_id)
gen ONA_DBTR_LOAN_SHR = ona_orgnlmtrty_c_ttl/dbtr_ona

* drop if missing loan share
sum ONA_DBTR_LOAN_SHR
drop if ONA_DBTR_LOAN_SHR == .

* summarize target variable
sum mtrty_byloantype wir_byloantype
tabulate cntry_f, summarize(mtrty_byloantype) 
* these mean values are closer to the estimations

* calculate mean value
/*
preserve
	egen ona_w = total(ONA_W_MTRTY_BYTYPE_y * ONA_DBTR_LOAN_SHR), by (dbtr_id)
	duplicates drop ona_w, force
	sum ona_w
restore
*/

* check bank variables
sum i_size
sum b_spec_diff_dbtr


*==============================================================================*
* Regression table
*==============================================================================*

*** export regression table
frame change dbtr_level
eststo clear
	eststo: reg nmbr_w ibn.cntry_f  i_size hhi_nuts3 b_spec_diff_dbtr i.sz_f i.sctr_f , noconstant baselevels 	
	eststo: reg mb_shr ibn.cntry_f  i_size hhi_nuts3 b_spec_diff_dbtr i.sz_f i.sctr_f , noconstant baselevels 
	
* change to instr_level
frame change instr_level	
	eststo: reg mtrty_byloantype ibn.cntry_f  i_size hhi_nuts3 b_spec_diff_dbtr i.sz_f i.sctr_f i.instr_f [aweight=ONA_DBTR_LOAN_SHR] , noconstant baselevels 
	eststo: reg wir_byloantype ibn.cntry_f i_size hhi_nuts3 b_spec_diff_dbtr i.sz_f i.sctr_f i.instr_f [aweight=ONA_DBTR_LOAN_SHR] , noconstant baselevels //	
	#delimit ;
	esttab using "${output}\Regr_table_4vars_controls.tex",  ///
	replace label se star(* 0.10 ** 0.05 *** 0.01) stats(r2 N, fmt(2 %9.0fc) labels(\$R^2\$ Observations)) ///
	noomit nobase coeflabels(hhi_nuts3 "Bank concentration" i_size "Bank size" b_spec_diff_dbtr "Bank specialization" ) 
	booktabs compress nodepvar nomtitles 
	mlabels("Number of relationships" "Main bank share of credit" "Maturity" "Interest rate")
	indicate("Country coeff. = *.cntry_f") 
	substitute(\footnotesize \smaller[1] r2 R2)

;
#delimit cr

eststo clear
